In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import figure
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import pickle
import sklearn
import scipy
sns.set()
In [10]:
path="Diabetes Prediction.csv"
data = pd.read_csv(path)
data.head()
Out[10]:
| Pregnancies | Glucose | BloodPressure | SkinThickness | Insulin | BMI | DiabetesPedigreeFunction | Age | Outcome | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 6 | 148 | 72 | 35 | 0 | 33.6 | 0.627 | 50 | 1 |
| 1 | 1 | 85 | 66 | 29 | 0 | 26.6 | 0.351 | 31 | 0 |
| 2 | 8 | 183 | 64 | 0 | 0 | 23.3 | 0.672 | 32 | 1 |
| 3 | 1 | 89 | 66 | 23 | 94 | 28.1 | 0.167 | 21 | 0 |
| 4 | 0 | 137 | 40 | 35 | 168 | 43.1 | 2.288 | 33 | 1 |
In [11]:
data.shape
Out[11]:
(768, 9)
In [12]:
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 768 entries, 0 to 767 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Pregnancies 768 non-null int64 1 Glucose 768 non-null int64 2 BloodPressure 768 non-null int64 3 SkinThickness 768 non-null int64 4 Insulin 768 non-null int64 5 BMI 768 non-null float64 6 DiabetesPedigreeFunction 768 non-null float64 7 Age 768 non-null int64 8 Outcome 768 non-null int64 dtypes: float64(2), int64(7) memory usage: 54.1 KB
In [13]:
data.describe().T
Out[13]:
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| Pregnancies | 768.0 | 3.845052 | 3.369578 | 0.000 | 1.00000 | 3.0000 | 6.00000 | 17.00 |
| Glucose | 768.0 | 120.894531 | 31.972618 | 0.000 | 99.00000 | 117.0000 | 140.25000 | 199.00 |
| BloodPressure | 768.0 | 69.105469 | 19.355807 | 0.000 | 62.00000 | 72.0000 | 80.00000 | 122.00 |
| SkinThickness | 768.0 | 20.536458 | 15.952218 | 0.000 | 0.00000 | 23.0000 | 32.00000 | 99.00 |
| Insulin | 768.0 | 79.799479 | 115.244002 | 0.000 | 0.00000 | 30.5000 | 127.25000 | 846.00 |
| BMI | 768.0 | 31.992578 | 7.884160 | 0.000 | 27.30000 | 32.0000 | 36.60000 | 67.10 |
| DiabetesPedigreeFunction | 768.0 | 0.471876 | 0.331329 | 0.078 | 0.24375 | 0.3725 | 0.62625 | 2.42 |
| Age | 768.0 | 33.240885 | 11.760232 | 21.000 | 24.00000 | 29.0000 | 41.00000 | 81.00 |
| Outcome | 768.0 | 0.348958 | 0.476951 | 0.000 | 0.00000 | 0.0000 | 1.00000 | 1.00 |
In [14]:
data_feature = data.columns
for feature in data_feature:
p = sns.distplot(a = data[feature])
plt.show()
C:\Users\Lenovo\AppData\Local\Temp\ipykernel_3860\4286188437.py:4: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 p = sns.distplot(a = data[feature])
C:\Users\Lenovo\AppData\Local\Temp\ipykernel_3860\4286188437.py:4: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 p = sns.distplot(a = data[feature])
C:\Users\Lenovo\AppData\Local\Temp\ipykernel_3860\4286188437.py:4: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 p = sns.distplot(a = data[feature])
C:\Users\Lenovo\AppData\Local\Temp\ipykernel_3860\4286188437.py:4: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 p = sns.distplot(a = data[feature])
C:\Users\Lenovo\AppData\Local\Temp\ipykernel_3860\4286188437.py:4: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 p = sns.distplot(a = data[feature])
C:\Users\Lenovo\AppData\Local\Temp\ipykernel_3860\4286188437.py:4: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 p = sns.distplot(a = data[feature])
C:\Users\Lenovo\AppData\Local\Temp\ipykernel_3860\4286188437.py:4: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 p = sns.distplot(a = data[feature])
C:\Users\Lenovo\AppData\Local\Temp\ipykernel_3860\4286188437.py:4: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 p = sns.distplot(a = data[feature])
C:\Users\Lenovo\AppData\Local\Temp\ipykernel_3860\4286188437.py:4: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 p = sns.distplot(a = data[feature])
In [15]:
data_zeros = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
In [16]:
data[data_zeros] = np.where((data[data_zeros] == 0), np.nan, data[data_zeros])
In [17]:
data.isnull().sum()
Out[17]:
Pregnancies 0 Glucose 5 BloodPressure 35 SkinThickness 227 Insulin 374 BMI 11 DiabetesPedigreeFunction 0 Age 0 Outcome 0 dtype: int64
In [18]:
p = data.hist(figsize = (20,20))
In [19]:
data.describe().T
Out[19]:
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| Pregnancies | 768.0 | 3.845052 | 3.369578 | 0.000 | 1.00000 | 3.0000 | 6.00000 | 17.00 |
| Glucose | 763.0 | 121.686763 | 30.535641 | 44.000 | 99.00000 | 117.0000 | 141.00000 | 199.00 |
| BloodPressure | 733.0 | 72.405184 | 12.382158 | 24.000 | 64.00000 | 72.0000 | 80.00000 | 122.00 |
| SkinThickness | 541.0 | 29.153420 | 10.476982 | 7.000 | 22.00000 | 29.0000 | 36.00000 | 99.00 |
| Insulin | 394.0 | 155.548223 | 118.775855 | 14.000 | 76.25000 | 125.0000 | 190.00000 | 846.00 |
| BMI | 757.0 | 32.457464 | 6.924988 | 18.200 | 27.50000 | 32.3000 | 36.60000 | 67.10 |
| DiabetesPedigreeFunction | 768.0 | 0.471876 | 0.331329 | 0.078 | 0.24375 | 0.3725 | 0.62625 | 2.42 |
| Age | 768.0 | 33.240885 | 11.760232 | 21.000 | 24.00000 | 29.0000 | 41.00000 | 81.00 |
| Outcome | 768.0 | 0.348958 | 0.476951 | 0.000 | 0.00000 | 0.0000 | 1.00000 | 1.00 |
In [20]:
sns.pairplot(data,diag_kind='kde');
In [21]:
data['Glucose'] = data['Glucose'].fillna(data['Glucose'].mean())
In [22]:
data['BloodPressure'] = data['BloodPressure'].fillna(data['BloodPressure'].mean())
In [23]:
sns.boxplot(y = 'SkinThickness', data = data)
Out[23]:
<Axes: ylabel='SkinThickness'>
In [24]:
data['SkinThickness'].mean(), data['SkinThickness'].median()
Out[24]:
(np.float64(29.153419593345657), np.float64(29.0))
In [25]:
data['SkinThickness'] = data['SkinThickness'].fillna(data['SkinThickness'].median())
In [26]:
data['Insulin'].mean(), data['Insulin'].median()
Out[26]:
(np.float64(155.5482233502538), np.float64(125.0))
In [27]:
data['Insulin'] = data['Insulin'].fillna(data['Insulin'].median())
In [28]:
data['BMI'].mean(), data['BMI'].median()
Out[28]:
(np.float64(32.457463672391015), np.float64(32.3))
In [29]:
data['BMI'] = data['BMI'].fillna(data['BMI'].median())
In [30]:
for i in range(9):
print(data.columns[i])
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
In [31]:
p = data.hist(figsize = (20,20))
In [32]:
sns.pairplot(data =data, hue = 'Outcome')
plt.show()
In [33]:
plt.figure(figsize=(12,10))
sns.heatmap(data.corr(), annot = True, cmap = "YlGnBu")
plt.show()
In [34]:
from scipy import stats
for feature in data.columns:
stats.probplot(data[feature], plot = plt)
plt.title(feature)
plt.show()
In [35]:
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
In [36]:
data.head()
Out[36]:
| Pregnancies | Glucose | BloodPressure | SkinThickness | Insulin | BMI | DiabetesPedigreeFunction | Age | Outcome | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 6 | 148.0 | 72.0 | 35.0 | 125.0 | 33.6 | 0.627 | 50 | 1 |
| 1 | 1 | 85.0 | 66.0 | 29.0 | 125.0 | 26.6 | 0.351 | 31 | 0 |
| 2 | 8 | 183.0 | 64.0 | 29.0 | 125.0 | 23.3 | 0.672 | 32 | 1 |
| 3 | 1 | 89.0 | 66.0 | 23.0 | 94.0 | 28.1 | 0.167 | 21 | 0 |
| 4 | 0 | 137.0 | 40.0 | 35.0 | 168.0 | 43.1 | 2.288 | 33 | 1 |
In [37]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
In [38]:
X.head()
Out[38]:
| Pregnancies | Glucose | BloodPressure | SkinThickness | Insulin | BMI | DiabetesPedigreeFunction | Age | |
|---|---|---|---|---|---|---|---|---|
| 0 | 6 | 148.0 | 72.0 | 35.0 | 125.0 | 33.6 | 0.627 | 50 |
| 1 | 1 | 85.0 | 66.0 | 29.0 | 125.0 | 26.6 | 0.351 | 31 |
| 2 | 8 | 183.0 | 64.0 | 29.0 | 125.0 | 23.3 | 0.672 | 32 |
| 3 | 1 | 89.0 | 66.0 | 23.0 | 94.0 | 28.1 | 0.167 | 21 |
| 4 | 0 | 137.0 | 40.0 | 35.0 | 168.0 | 43.1 | 2.288 | 33 |
In [39]:
y.head()
Out[39]:
0 1 1 0 2 1 3 0 4 1 Name: Outcome, dtype: int64
In [40]:
X.head()
Out[40]:
| Pregnancies | Glucose | BloodPressure | SkinThickness | Insulin | BMI | DiabetesPedigreeFunction | Age | |
|---|---|---|---|---|---|---|---|---|
| 0 | 6 | 148.0 | 72.0 | 35.0 | 125.0 | 33.6 | 0.627 | 50 |
| 1 | 1 | 85.0 | 66.0 | 29.0 | 125.0 | 26.6 | 0.351 | 31 |
| 2 | 8 | 183.0 | 64.0 | 29.0 | 125.0 | 23.3 | 0.672 | 32 |
| 3 | 1 | 89.0 | 66.0 | 23.0 | 94.0 | 28.1 | 0.167 | 21 |
| 4 | 0 | 137.0 | 40.0 | 35.0 | 168.0 | 43.1 | 2.288 | 33 |
In [41]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
In [42]:
def svm_classifier(X_train, X_test, y_train, y_test):
classifier_svm = SVC(kernel = 'rbf', random_state = 0)
classifier_svm.fit(X_train, y_train)
y_pred = classifier_svm.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
return print(f"Train score : {classifier_svm.score(X_train, y_train)}\nTest score : {classifier_svm.score(X_test, y_test)}")
In [43]:
def knn_classifier(X_train, X_test, y_train, y_test):
classifier_knn = KNeighborsClassifier(metric = 'minkowski', p = 2)
classifier_knn.fit(X_train, y_train)
y_pred = classifier_knn.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
return print(f"Train score : {classifier_knn.score(X_train, y_train)}\nTest score : {classifier_knn.score(X_test, y_test)}")
In [44]:
def naive_classifier(X_train, X_test, y_train, y_test):
classifier_naive = GaussianNB()
classifier_naive.fit(X_train, y_train)
y_pred = classifier_naive.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
return print(f"Train score : {classifier_naive.score(X_train, y_train)}\nTest score : {classifier_naive.score(X_test, y_test)}")
In [45]:
def tree_classifier(X_train, X_test, y_train, y_test):
classifier_tree = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier_tree.fit(X_train, y_train)
y_pred = classifier_tree.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
return print(f"Train score : {classifier_tree.score(X_train, y_train)}\nTest score : {classifier_tree.score(X_test, y_test)}")
In [46]:
def forest_classifier(X_train, X_test, y_train, y_test):
classifier_forest = RandomForestClassifier(criterion = 'entropy', random_state = 0)
classifier_forest.fit(X_train, y_train)
y_pred = classifier_forest.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
return print(f"Train score : {classifier_forest.score(X_train, y_train)}\nTest score : {classifier_forest.score(X_test, y_test)}")
In [47]:
def print_score(X_train, X_test, y_train, y_test):
print("SVM:\n")
svm_classifier(X_train, X_test, y_train, y_test)
print("-"*100)
print()
print("KNN:\n")
knn_classifier(X_train, X_test, y_train, y_test)
print("-"*100)
print()
print("Naive:\n")
naive_classifier(X_train, X_test, y_train, y_test)
print("-"*100)
print()
print("Decision Tree:\n")
tree_classifier(X_train, X_test, y_train, y_test)
print("-"*100)
print()
print("Random Forest:\n")
forest_classifier(X_train, X_test, y_train, y_test)
In [48]:
print_score(X_train, X_test, y_train, y_test)
SVM: Train score : 0.758957654723127 Test score : 0.7922077922077922 ---------------------------------------------------------------------------------------------------- KNN: Train score : 0.8013029315960912 Test score : 0.7662337662337663 ---------------------------------------------------------------------------------------------------- Naive: Train score : 0.745928338762215 Test score : 0.7857142857142857 ---------------------------------------------------------------------------------------------------- Decision Tree: Train score : 1.0 Test score : 0.6883116883116883 ---------------------------------------------------------------------------------------------------- Random Forest: Train score : 1.0 Test score : 0.8116883116883117
In [49]:
classifier_forest = RandomForestClassifier(criterion = 'entropy')
classifier_forest.fit(X_train, y_train)
y_pred = classifier_forest.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cm
Out[49]:
array([[93, 14],
[17, 30]])
In [50]:
pd.crosstab(y_test, y_pred, rownames=['True'], colnames=['Predicted'], margins=True)
Out[50]:
| Predicted | 0 | 1 | All |
|---|---|---|---|
| True | |||
| 0 | 93 | 14 | 107 |
| 1 | 17 | 30 | 47 |
| All | 110 | 44 | 154 |
In [51]:
data['Outcome'].value_counts()
Out[51]:
Outcome 0 500 1 268 Name: count, dtype: int64
In [52]:
from sklearn.metrics import roc_auc_score, roc_curve, classification_report
In [53]:
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 0.85 0.87 0.86 107
1 0.68 0.64 0.66 47
accuracy 0.80 154
macro avg 0.76 0.75 0.76 154
weighted avg 0.80 0.80 0.80 154
In [54]:
y_pred_prob = classifier_forest.predict_proba(X_test)[:,1]
y_pred_prob
Out[54]:
array([0.91, 0.2 , 0.09, 0.63, 0.07, 0.02, 0.8 , 0.87, 0.26, 0.43, 0.73,
0.79, 0.16, 0.15, 0.35, 0.41, 0.83, 0.01, 0.51, 0.21, 0.67, 0.14,
0.06, 0.24, 0.02, 0.3 , 0.02, 0.86, 0. , 0.09, 0.4 , 0.25, 0.23,
0.65, 0.07, 0.74, 0.43, 0.02, 0.21, 0.7 , 0.23, 0.09, 0.13, 0.84,
0.59, 0.08, 0.07, 0.12, 0.32, 0.23, 0.41, 0.16, 0.84, 0.63, 0.21,
0.05, 0.16, 0.36, 0.25, 0.51, 0.66, 0.7 , 0.05, 0.68, 0.9 , 0.49,
0.66, 0.11, 0.7 , 0.26, 0.05, 0.2 , 0.09, 0.79, 0.89, 0.52, 0.11,
0.6 , 0.3 , 0.28, 0.42, 0.43, 0.12, 0.01, 0.22, 0.15, 0.07, 0.29,
0.9 , 0.09, 0.31, 0.24, 0.11, 0.02, 0.66, 0.13, 0.27, 0.42, 0.39,
0.54, 0.16, 0. , 0.14, 0.04, 0.63, 0.63, 0.1 , 0.62, 0.04, 0.49,
0.02, 0.55, 0.59, 0.42, 0.69, 0.63, 0.05, 0.35, 0.1 , 0.82, 0.38,
0.38, 0.2 , 0.3 , 0.08, 0. , 0.28, 0.43, 0.46, 0.4 , 0.43, 0.35,
0.03, 0.73, 0.22, 0.66, 0.28, 0.67, 0.43, 0.16, 0.06, 0.8 , 0.01,
0.18, 0.75, 0. , 0.07, 0.1 , 0.07, 0.31, 0.06, 0.31, 0.04, 0.19])
In [55]:
fpr, tpr, threshold = roc_curve(y_test, y_pred_prob)
print("FPR:\n\n", fpr)
print("-"*100)
print("TPR:\n\n", tpr)
FPR: [0. 0. 0. 0. 0.00934579 0.00934579 0.00934579 0.02803738 0.02803738 0.02803738 0.05607477 0.05607477 0.05607477 0.06542056 0.07476636 0.09345794 0.11214953 0.11214953 0.11214953 0.12149533 0.13084112 0.13084112 0.14018692 0.1682243 0.18691589 0.22429907 0.22429907 0.24299065 0.24299065 0.27102804 0.27102804 0.28037383 0.30841121 0.31775701 0.34579439 0.34579439 0.36448598 0.37383178 0.39252336 0.41121495 0.42990654 0.48598131 0.5046729 0.55140187 0.57009346 0.57943925 0.59813084 0.60747664 0.6635514 0.71028037 0.72897196 0.77570093 0.80373832 0.8411215 0.86915888 0.87850467 0.93457944 0.96261682 1. ] ---------------------------------------------------------------------------------------------------- TPR: [0. 0.0212766 0.06382979 0.10638298 0.10638298 0.14893617 0.19148936 0.23404255 0.27659574 0.31914894 0.31914894 0.36170213 0.40425532 0.46808511 0.46808511 0.53191489 0.53191489 0.57446809 0.61702128 0.61702128 0.63829787 0.68085106 0.68085106 0.74468085 0.76595745 0.76595745 0.78723404 0.78723404 0.80851064 0.80851064 0.82978723 0.87234043 0.87234043 0.87234043 0.87234043 0.89361702 0.89361702 0.91489362 0.91489362 0.93617021 0.93617021 0.93617021 0.93617021 0.93617021 0.93617021 0.95744681 0.95744681 0.9787234 0.9787234 0.9787234 0.9787234 1. 1. 1. 1. 1. 1. 1. 1. ]
In [56]:
plt.plot([0, 1], [0, 1], "k--", label = '50% AUC')
plt.plot(fpr, tpr, label = "Random Forest")
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.title("ROC Curve - Random Forest")
plt.show()
In [57]:
roc_auc_score(y_test,y_pred_prob)
Out[57]:
np.float64(0.8583217339431298)
In [58]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier_forest, X = X_train, y = y_train, cv = 10)
print(accuracies.mean(), accuracies.std())
0.7491274457958752 0.04828104302127529
In [59]:
from sklearn.model_selection import GridSearchCV
In [60]:
parameters = {
'n_estimators': [25, 50, 200, 300],
'criterion': ['gini', 'entropy'],
'max_depth': [14, 20, 25, 30]
}
In [61]:
grid_search = GridSearchCV(estimator = classifier_forest,
param_grid = parameters,
scoring = 'accuracy',
cv = 10,
n_jobs = -1)
grid_search = grid_search.fit(X_train, y_train)
print('best_accuracy = ',grid_search.best_score_)
print('best_parameters = ', grid_search.best_params_)
best_accuracy = 0.7606292966684294
best_parameters = {'criterion': 'gini', 'max_depth': 14, 'n_estimators': 300}
In [62]:
classifier_forest = RandomForestClassifier(criterion = 'gini', max_depth = 25, n_estimators = 200, random_state = 0)
classifier_forest.fit(X_train, y_train)
y_pred = classifier_forest.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cm
Out[62]:
array([[94, 13],
[13, 34]])
In [63]:
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 0.88 0.88 0.88 107
1 0.72 0.72 0.72 47
accuracy 0.83 154
macro avg 0.80 0.80 0.80 154
weighted avg 0.83 0.83 0.83 154
In [64]:
confusion_matrix(y_test, y_pred)
Out[64]:
array([[94, 13],
[13, 34]])
In [ ]: